{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "1547c023-b190-463f-9d3d-f0b0a4d71a39",
   "metadata": {},
   "source": [
    "- https://gist.github.com/vgel/55b845833af9f189ae1f834d5f7da8c3\n",
    "    - logprob_inconsistency.py\n",
    " \n",
    "\n",
    "```python\n",
    "responses = []\n",
    "for _ in range(2):\n",
    "    resp = client.chat.completions.create(\n",
    "        model=model,\n",
    "        messages=[\n",
    "            {\"role\": \"user\", \"content\": question},\n",
    "        ],\n",
    "        temperature=1.0,\n",
    "        max_tokens=3,\n",
    "        logprobs=True,\n",
    "        top_logprobs=3,\n",
    "    )\n",
    "    print(f\"{model} trial {_ + 1} chosen tokens:\", resp.choices[0].message.content)\n",
    "    responses.append(resp.choices[0].logprobs.content)\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "dd2b9e4a-a215-41ac-bca5-f6e5a169e154",
   "metadata": {},
   "source": [
    "$$\n",
    "\\exp(\\log p)=p\n",
    "$$"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "652a4147-70a0-4b01-8d1e-b85fda19ff1c",
   "metadata": {},
   "source": [
    "```\n",
    "gpt-4o-mini trial 1 chosen tokens: AABBA (A|AB|BA)\n",
    "gpt-4o-mini trial 2 chosen tokens: ABBAAB (AB|BA|AB)\n",
    "gpt-4o-mini trial 1                // gpt-4o-mini trial 2\n",
    "-----------------------------------//-----------------------------------\n",
    "- 'AB'       |  51.31% |  -0.66736 // 'AB'       |  51.92% |  -0.65544\n",
    "- 'BA'       |  31.12% |  -1.16736 // 'BA'       |  31.49% |  -1.15544\n",
    "- 'A'        |   8.92% |  -2.41736 // 'A'        |   9.02% |  -2.40544\n",
    "\n",
    "- 'AB'       |  99.07% |  -0.00932 // 'BA'       |  81.25% |  -0.20761\n",
    "- 'ABB'      |   0.86% |  -4.75932 // 'AB'       |  18.13% |  -1.70761\n",
    "- 'ABA'      |   0.07% |  -7.25932 // 'ABA'      |   0.33% |  -5.70761\n",
    "\n",
    "- 'BA'       |  92.01% |  -0.08323 // 'AB'       |  88.97% |  -0.11682\n",
    "- 'AB'       |   7.55% |  -2.58323 // 'A'        |   9.38% |  -2.36682\n",
    "- 'ABB'      |   0.23% |  -6.08323 // 'ABA'      |   1.27% |  -4.36682\n",
    "```\n",
    "\n",
    "```\n",
    "deepseek-chat trial 1 chosen tokens: ABABBA\n",
    "deepseek-chat trial 2 chosen tokens: ABABBA\n",
    "deepseek-chat trial 1              // deepseek-chat trial 2\n",
    "-----------------------------------//-----------------------------------\n",
    "- 'AB'       |  52.90% |  -0.63672 // 'AB'       |  56.94% |  -0.56308\n",
    "- 'Here'     |  46.58% |  -0.76399 // 'Here'     |  42.72% |  -0.85039\n",
    "- 'BA'       |   0.33% |  -5.70764 // 'ABA'      |   0.16% |  -6.42029\n",
    "\n",
    "- 'BA'       |  59.17% |  -0.52468 // 'BA'       |  60.01% |  -0.51058\n",
    "- 'AB'       |  39.07% |  -0.93970 // 'AB'       |  38.48% |  -0.95510\n",
    "- 'ABA'      |   1.75% |  -4.04636 // 'ABA'      |   1.49% |  -4.20826\n",
    "\n",
    "- 'BA'       |  99.92% |  -0.00077 // 'BA'       |  99.73% |  -0.00274\n",
    "- 'BB'       |   0.08% |  -7.16542 // 'BB'       |   0.27% |  -5.91770\n",
    "- 'AB'       |   0.00% | -13.57515 // 'AB'       |   0.00% | -10.23312\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cf292eb2-b590-42f3-8e1e-355cbbdc7d8e",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
